{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm_notebook as tqdm\n",
    "import preprocessor as p\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %run ../twitter15/twitter15.ipynb\n",
    "%run ../twitter16/twitter16_text_processing.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torchvision.transforms as transforms\n",
    "import torchvision.datasets as dsets\n",
    "from torch.autograd import Variable\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence\n",
    "import torch.optim as optim\n",
    "import torch.nn.functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0104 09:58:08.245404 140687802902272 file_utils.py:39] PyTorch version 1.3.0 available.\n",
      "I0104 09:58:08.559976 140687802902272 modeling_xlnet.py:194] Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .\n"
     ]
    }
   ],
   "source": [
    "from transformers import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "import spacy\n",
    "from tqdm import tqdm, tqdm_notebook, tnrange\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0104 09:58:09.961360 140687802902272 tokenization_utils.py:374] loading file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /home/nikhil.pinnaparaju/.cache/torch/transformers/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084\n"
     ]
    }
   ],
   "source": [
    "tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def pad_data(s, maxlen=30):\n",
    "        padded = np.zeros((maxlen,), dtype=np.int64)\n",
    "        if len(s) > maxlen: padded[:] = s[:maxlen]\n",
    "        else: padded[:len(s)] = s\n",
    "        return padded"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = {'true':0,'false':1,'unverified':2,'non-rumor':3}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataids = list(twitter15_text.keys())\n",
    "X = [ twitter15_text[x] for x in dataids ]\n",
    "y = [ labels[twitter15_labels[x]] for x in dataids]\n",
    "sent2idx = [torch.tensor(tokenizer.encode(x, add_special_tokens=True)) for x in X]\n",
    "padded = [ pad_data(x) for x in sent2idx ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "attention_masks = []\n",
    "\n",
    "# For each sentence...\n",
    "for sent in padded:\n",
    "    \n",
    "    # Create the attention mask.\n",
    "    #   - If a token ID is 0, then it's padding, set the mask to 0.\n",
    "    #   - If a token ID is > 0, then it's a real token, set the mask to 1.\n",
    "    att_mask = [int(token_id > 0) for token_id in sent]\n",
    "    \n",
    "    # Store the attention mask for this sentence.\n",
    "    attention_masks.append(att_mask)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(padded, y, \n",
    "                                                            random_state=2018, test_size=0.1)\n",
    "# Do the same for the masks.\n",
    "train_masks, validation_masks, _, _ = train_test_split(attention_masks, y,\n",
    "                                             random_state=2018, test_size=0.1)\n",
    "\n",
    "train_inputs = torch.tensor(train_inputs)\n",
    "validation_inputs = torch.tensor(validation_inputs)\n",
    "\n",
    "train_labels = torch.tensor(train_labels)\n",
    "validation_labels = torch.tensor(validation_labels)\n",
    "\n",
    "train_masks = torch.tensor(train_masks)\n",
    "validation_masks = torch.tensor(validation_masks)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler\n",
    "\n",
    "# The DataLoader needs to know our batch size for training, so we specify it \n",
    "# here.\n",
    "# For fine-tuning BERT on a specific task, the authors recommend a batch size of\n",
    "# 16 or 32.\n",
    "\n",
    "batch_size = 32\n",
    "\n",
    "# Create the DataLoader for our training set.\n",
    "train_data = TensorDataset(train_inputs, train_masks, train_labels)\n",
    "train_sampler = RandomSampler(train_data)\n",
    "train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n",
    "\n",
    "# Create the DataLoader for our validation set.\n",
    "validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)\n",
    "validation_sampler = SequentialSampler(validation_data)\n",
    "validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "23\n"
     ]
    }
   ],
   "source": [
    "print(len(train_dataloader))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "if torch.cuda.is_available():\n",
    "    device = 'cuda:2'\n",
    "else:\n",
    "    device = 'cpu'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "I0104 09:58:11.479258 140687802902272 configuration_utils.py:151] loading configuration file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-config.json from cache at /home/nikhil.pinnaparaju/.cache/torch/transformers/4dad0251492946e18ac39290fcfe91b89d370fee250efe9521476438fe8ca185.bf3b9ea126d8c0001ee8a1e8b92229871d06d36d8808208cc2449280da87785c\n",
      "I0104 09:58:11.481143 140687802902272 configuration_utils.py:168] Model config {\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"finetuning_task\": null,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 768,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"num_labels\": 4,\n",
      "  \"output_attentions\": false,\n",
      "  \"output_hidden_states\": false,\n",
      "  \"output_past\": true,\n",
      "  \"pruned_heads\": {},\n",
      "  \"torchscript\": false,\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_bfloat16\": false,\n",
      "  \"vocab_size\": 30522\n",
      "}\n",
      "\n",
      "I0104 09:58:12.397128 140687802902272 modeling_utils.py:337] loading weights file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-pytorch_model.bin from cache at /home/nikhil.pinnaparaju/.cache/torch/transformers/aa1ef1aede4482d0dbcd4d52baad8ae300e60902e88fcb0bebdec09afd232066.36ca03ab34a1a5d5fa7bc3d03d55c4fa650fed07220e2eeebc06ce58d0e9a157\n",
      "I0104 09:58:15.683848 140687802902272 modeling_utils.py:405] Weights of BertForSequenceClassification not initialized from pretrained model: ['classifier.weight', 'classifier.bias']\n",
      "I0104 09:58:15.684865 140687802902272 modeling_utils.py:408] Weights from pretrained model not used in BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']\n"
     ]
    }
   ],
   "source": [
    "# model = BertForSequenceClassification.from_pretrained('bert-base-uncased').to(device)\n",
    "\n",
    "model = BertForSequenceClassification.from_pretrained(\n",
    "    \"bert-base-uncased\", # Use the 12-layer BERT model, with an uncased vocab.\n",
    "    num_labels = 4, # The number of output labels--2 for binary classification.\n",
    "                    # You can increase this for multi-class tasks.   \n",
    ").to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = AdamW(model.parameters(),\n",
    "                  lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5\n",
    "                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.\n",
    "                )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Number of training epochs (authors recommend between 2 and 4)\n",
    "epochs = 4\n",
    "\n",
    "# Total number of training steps is number of batches * number of epochs.\n",
    "total_steps = len(train_dataloader) * epochs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to calculate the accuracy of our predictions vs labels\n",
    "def flat_accuracy(preds, labels):\n",
    "    pred_flat = np.argmax(preds, axis=1).flatten()\n",
    "    labels_flat = labels.flatten()\n",
    "    return np.sum(pred_flat == labels_flat) / len(labels_flat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time\n",
    "import datetime\n",
    "\n",
    "def format_time(elapsed):\n",
    "    '''\n",
    "    Takes a time in seconds and returns a string hh:mm:ss\n",
    "    '''\n",
    "    # Round to the nearest second.\n",
    "    elapsed_rounded = int(round((elapsed)))\n",
    "    \n",
    "    # Format as hh:mm:ss\n",
    "    return str(datetime.timedelta(seconds=elapsed_rounded))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "======== Epoch 1 / 4 ========\n",
      "Training...\n",
      "\n",
      "  Average training loss: 1.37\n",
      "  Training epcoh took: 0:00:05\n",
      "\n",
      "Running Validation...\n",
      "acc: 0.5609756097560976 recall 0.5357894736842106 prec: 0.6330267558528428 f1: 0.48965179968701095\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.65      0.89      0.76        19\n",
      "           1       0.48      0.88      0.62        25\n",
      "           2       0.60      0.16      0.25        19\n",
      "           3       0.80      0.21      0.33        19\n",
      "\n",
      "   micro avg       0.56      0.56      0.56        82\n",
      "   macro avg       0.63      0.54      0.49        82\n",
      "weighted avg       0.62      0.56      0.50        82\n",
      "\n",
      "  Accuracy: 0.56\n",
      "  Validation took: 0:00:00\n",
      "\n",
      "======== Epoch 2 / 4 ========\n",
      "Training...\n",
      "\n",
      "  Average training loss: 1.15\n",
      "  Training epcoh took: 0:00:04\n",
      "\n",
      "Running Validation...\n",
      "acc: 0.5609756097560976 recall 0.5578947368421052 prec: 0.5534722222222223 f1: 0.5480374758599091\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.56      0.79      0.65        19\n",
      "           1       0.62      0.60      0.61        25\n",
      "           2       0.50      0.42      0.46        19\n",
      "           3       0.53      0.42      0.47        19\n",
      "\n",
      "   micro avg       0.56      0.56      0.56        82\n",
      "   macro avg       0.55      0.56      0.55        82\n",
      "weighted avg       0.56      0.56      0.55        82\n",
      "\n",
      "  Accuracy: 0.56\n",
      "  Validation took: 0:00:00\n",
      "\n",
      "======== Epoch 3 / 4 ========\n",
      "Training...\n",
      "\n",
      "  Average training loss: 0.92\n",
      "  Training epcoh took: 0:00:04\n",
      "\n",
      "Running Validation...\n",
      "acc: 0.6585365853658537 recall 0.6568421052631579 prec: 0.6668002392344498 f1: 0.657440264103198\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.79      0.79      0.79        19\n",
      "           1       0.77      0.68      0.72        25\n",
      "           2       0.48      0.63      0.55        19\n",
      "           3       0.62      0.53      0.57        19\n",
      "\n",
      "   micro avg       0.66      0.66      0.66        82\n",
      "   macro avg       0.67      0.66      0.66        82\n",
      "weighted avg       0.67      0.66      0.66        82\n",
      "\n",
      "  Accuracy: 0.67\n",
      "  Validation took: 0:00:00\n",
      "\n",
      "======== Epoch 4 / 4 ========\n",
      "Training...\n",
      "\n",
      "  Average training loss: 0.67\n",
      "  Training epcoh took: 0:00:04\n",
      "\n",
      "Running Validation...\n",
      "acc: 0.7195121951219512 recall 0.7194736842105263 prec: 0.7260869565217392 f1: 0.7197344322344323\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.85      0.89      0.87        19\n",
      "           1       0.78      0.72      0.75        25\n",
      "           2       0.52      0.63      0.57        19\n",
      "           3       0.75      0.63      0.69        19\n",
      "\n",
      "   micro avg       0.72      0.72      0.72        82\n",
      "   macro avg       0.73      0.72      0.72        82\n",
      "weighted avg       0.73      0.72      0.72        82\n",
      "\n",
      "  Accuracy: 0.73\n",
      "  Validation took: 0:00:00\n",
      "\n",
      "Training complete!\n"
     ]
    }
   ],
   "source": [
    "import random\n",
    "\n",
    "# This training code is based on the `run_glue.py` script here:\n",
    "# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128\n",
    "\n",
    "\n",
    "# Set the seed value all over the place to make this reproducible.\n",
    "seed_val = 42\n",
    "\n",
    "random.seed(seed_val)\n",
    "np.random.seed(seed_val)\n",
    "torch.manual_seed(seed_val)\n",
    "torch.cuda.manual_seed_all(seed_val)\n",
    "\n",
    "# Store the average loss after each epoch so we can plot them.\n",
    "loss_values = []\n",
    "\n",
    "# For each epoch...\n",
    "for epoch_i in range(0, epochs):\n",
    "    \n",
    "    # ========================================\n",
    "    #               Training\n",
    "    # ========================================\n",
    "    \n",
    "    # Perform one full pass over the training set.\n",
    "\n",
    "    print(\"\")\n",
    "    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))\n",
    "    print('Training...')\n",
    "\n",
    "    # Measure how long the training epoch takes.\n",
    "    t0 = time.time()\n",
    "\n",
    "    # Reset the total loss for this epoch.\n",
    "    total_loss = 0\n",
    "\n",
    "    # Put the model into training mode. Don't be mislead--the call to \n",
    "    # `train` just changes the *mode*, it doesn't *perform* the training.\n",
    "    # `dropout` and `batchnorm` layers behave differently during training\n",
    "    # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)\n",
    "    model.train()\n",
    "\n",
    "    # For each batch of training data...\n",
    "    for step, batch in enumerate(train_dataloader):\n",
    "\n",
    "        # Progress update every 40 batches.\n",
    "        if step % 40 == 0 and not step == 0:\n",
    "            # Calculate elapsed time in minutes.\n",
    "            elapsed = format_time(time.time() - t0)\n",
    "            \n",
    "            # Report progress.\n",
    "            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))\n",
    "\n",
    "        # Unpack this training batch from our dataloader. \n",
    "        #\n",
    "        # As we unpack the batch, we'll also copy each tensor to the GPU using the \n",
    "        # `to` method.\n",
    "        #\n",
    "        # `batch` contains three pytorch tensors:\n",
    "        #   [0]: input ids \n",
    "        #   [1]: attention masks\n",
    "        #   [2]: labels \n",
    "        b_input_ids = batch[0].to(device)\n",
    "        b_input_mask = batch[1].to(device)\n",
    "        b_labels = batch[2].to(device)\n",
    "\n",
    "        # Always clear any previously calculated gradients before performing a\n",
    "        # backward pass. PyTorch doesn't do this automatically because \n",
    "        # accumulating the gradients is \"convenient while training RNNs\". \n",
    "        # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)\n",
    "        model.zero_grad()        \n",
    "\n",
    "        # Perform a forward pass (evaluate the model on this training batch).\n",
    "        # This will return the loss (rather than the model output) because we\n",
    "        # have provided the `labels`.\n",
    "        # The documentation for this `model` function is here: \n",
    "        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
    "        outputs = model(b_input_ids, \n",
    "                    token_type_ids=None, \n",
    "                    attention_mask=b_input_mask, \n",
    "                    labels=b_labels)\n",
    "        \n",
    "        # The call to `model` always returns a tuple, so we need to pull the \n",
    "        # loss value out of the tuple.\n",
    "        loss = outputs[0]\n",
    "\n",
    "        # Accumulate the training loss over all of the batches so that we can\n",
    "        # calculate the average loss at the end. `loss` is a Tensor containing a\n",
    "        # single value; the `.item()` function just returns the Python value \n",
    "        # from the tensor.\n",
    "        total_loss += loss.item()\n",
    "\n",
    "        # Perform a backward pass to calculate the gradients.\n",
    "        loss.backward()\n",
    "\n",
    "        # Clip the norm of the gradients to 1.0.\n",
    "        # This is to help prevent the \"exploding gradients\" problem.\n",
    "        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)\n",
    "\n",
    "        # Update parameters and take a step using the computed gradient.\n",
    "        # The optimizer dictates the \"update rule\"--how the parameters are\n",
    "        # modified based on their gradients, the learning rate, etc.\n",
    "        optimizer.step()\n",
    "\n",
    "        # Update the learning rate.\n",
    "\n",
    "    # Calculate the average loss over the training data.\n",
    "    avg_train_loss = total_loss / len(train_dataloader)            \n",
    "    \n",
    "    # Store the loss value for plotting the learning curve.\n",
    "    loss_values.append(avg_train_loss)\n",
    "\n",
    "    print(\"\")\n",
    "    print(\"  Average training loss: {0:.2f}\".format(avg_train_loss))\n",
    "    print(\"  Training epcoh took: {:}\".format(format_time(time.time() - t0)))\n",
    "        \n",
    "    # ========================================\n",
    "    #               Validation\n",
    "    # ========================================\n",
    "    # After the completion of each training epoch, measure our performance on\n",
    "    # our validation set.\n",
    "\n",
    "    print(\"\")\n",
    "    print(\"Running Validation...\")\n",
    "\n",
    "    t0 = time.time()\n",
    "\n",
    "    # Put the model in evaluation mode--the dropout layers behave differently\n",
    "    # during evaluation.\n",
    "    model.eval()\n",
    "\n",
    "    # Tracking variables \n",
    "    allLabels = []\n",
    "    allPreds = []\n",
    "    eval_loss, eval_accuracy = 0, 0\n",
    "    nb_eval_steps, nb_eval_examples = 0, 0\n",
    "\n",
    "    # Evaluate data for one epoch\n",
    "    for batch in validation_dataloader:\n",
    "        \n",
    "        # Add batch to GPU\n",
    "        batch = tuple(t.to(device) for t in batch)\n",
    "        \n",
    "        # Unpack the inputs from our dataloader\n",
    "        b_input_ids, b_input_mask, b_labels = batch\n",
    "        \n",
    "        # Telling the model not to compute or store gradients, saving memory and\n",
    "        # speeding up validation\n",
    "        with torch.no_grad():        \n",
    "\n",
    "            # Forward pass, calculate logit predictions.\n",
    "            # This will return the logits rather than the loss because we have\n",
    "            # not provided labels.\n",
    "            # token_type_ids is the same as the \"segment ids\", which \n",
    "            # differentiates sentence 1 and 2 in 2-sentence tasks.\n",
    "            # The documentation for this `model` function is here: \n",
    "            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification\n",
    "            outputs = model(b_input_ids, \n",
    "                            token_type_ids=None, \n",
    "                            attention_mask=b_input_mask)\n",
    "        \n",
    "        # Get the \"logits\" output by the model. The \"logits\" are the output\n",
    "        # values prior to applying an activation function like the softmax.\n",
    "        logits = outputs[0]\n",
    "\n",
    "        # Move logits and labels to CPU\n",
    "        logits = logits.detach().cpu().numpy()\n",
    "        label_ids = b_labels.to('cpu').numpy()\n",
    "        allPreds += torch.max(torch.tensor(logits),1)[1].numpy().tolist()\n",
    "        allLabels += label_ids.tolist()\n",
    "#         print(logits)\n",
    "        \n",
    "        # Calculate the accuracy for this batch of test sentences.\n",
    "        tmp_eval_accuracy = flat_accuracy(logits, label_ids)\n",
    "        \n",
    "        # Accumulate the total accuracy.\n",
    "        eval_accuracy += tmp_eval_accuracy\n",
    "\n",
    "        # Track the number of batches\n",
    "        nb_eval_steps += 1\n",
    "\n",
    "    valacc = accuracy_score(allLabels, allPreds)\n",
    "    recscore = recall_score(allLabels, allPreds,average='macro')\n",
    "    precscore = precision_score(allLabels, allPreds,average='macro')\n",
    "    f1score = f1_score(allLabels, allPreds,average='macro')\n",
    "    cr = classification_report(allLabels, allPreds)\n",
    "\n",
    "    print(f'acc: {valacc} recall {recscore} prec: {precscore} f1: {f1score}')\n",
    "    print(cr)\n",
    "    \n",
    "    print(\"  Accuracy: {0:.2f}\".format(eval_accuracy/nb_eval_steps))\n",
    "    print(\"  Validation took: {:}\".format(format_time(time.time() - t0)))\n",
    "\n",
    "print(\"\")\n",
    "print(\"Training complete!\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "fakenews",
   "language": "python",
   "name": "fakenews"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
